*******************************************************************************
* .do-file to adjust the file that includes all waves combined
* for example: create add. variables, make variables consistent across years
* the final dataset from this file is used to carry out analysis
*******************************************************************************

		clear all
		set maxvar 6000
				
		cd "$scfwkddata"
		use all_adj, clear

*************************************************************
************** create single/ couple indicator **************
*************************************************************

// work with martref1 to identify cohabiting couples (not possible in defintion of martref)

// drop all observations for whom no information on martref1 is available 

	    drop if martref1 == . | martref1 > 6  // not coded as category in codebook - 40 observations

// 1: married, 2: living with partner, 3: separated, 4: divorced, 5: widowed, 6: never married 

// SINGLE	
		gen single = 3  if martref1 == 3 | martref1 == 4 | martref1 == 5 | martref1 == 6  // single indicator
		replace single = 0 if single == 3 & sexref == 1  // single men 
		replace single = 1 if single == 3 & sexref == 2  // single women 

// COUPLE

		gen couple = 1 if martref1 == 1 | martref1 == 2 // both married and living with a partner
		
// FAMILY TYPE

		gen fam_type = .
		replace fam_type = 0 if single == 0  // single men
		replace fam_type = 1 if single == 1  // single women 
		replace fam_type = 2 if single == .  // couples 


*************************************************************
****************** year-to-year changes in data *************
*************************************************************

		// hometype of principal residence: recode as either house/apartment or other (such as RV, mh, ranch)
		replace hometype     = 1  if hometype     == 4 | hometype == 5 
		replace owntype_farm = -7 if owntype_farm == 6

		// exclude own foundation (variable no available prior to 2007), and prepaid (var not available prior to 2016)
		drop found prepaid
		
		// mutual funds: category 'other mutual fund' not availabe before 2001
		// will be labeled as risky bc of note in codebook: almost entirely hedgefunds, small amount of ETFs
		replace omutf = 0 if omutf == .
		
		// annuities and trusts: before 1995, coded as one varible: make consistent 
		replace ann_trust = annuit+trusts if year >= 1998
		
		// _r refers to "risky", _s refers to "safe"
		gen annuit_r = 0
		gen annuit_s = 0
		gen trusts_r = 0
		gen trusts_s = 0
		gen ann_trust_r = 0
		gen ann_trust_s = 0
		
		// for categorization: after 2004, use information on fraction invested in stocks
		// and information on how invested: _invest = 3 split, = 1 all in stocks, = 2 all in bonds
		
		replace annuit_r = annuit_ps*annuit     if annuit_invest == 3 & year > 2004
		replace annuit_s = (1-annuit_ps)*annuit if annuit_invest == 3 & year > 2004
		
		replace annuit_r = annuit if annuit_invest == 1 & year > 2004
		replace annuit_s = annuit if annuit_invest == 2 & year > 2004
		
		replace trusts_r = trusts_ps*trusts     if trusts_invest == 3 & year > 2004
		replace trusts_s = (1-trusts_ps)*trusts if trusts_invest == 3 & year > 2004
		
		replace trusts_r = trusts if trusts_invest == 1 & year > 2004
		replace trusts_s = trusts if trusts_invest == 2 & year > 2004
		
		replace ann_trust_r = annuit_r + trusts_r if year > 2004
		replace ann_trust_s = annuit_s + trusts_s if year > 2004

		// for categorization: in 2004, no information on exact splitting rule, use 50/50 as proxy 
		replace annuit_r = 0.5*annuit if annuit_invest == 3 & year == 2004
		replace annuit_s = 0.5*annuit if annuit_invest == 3 & year == 2004
		
		replace annuit_r = annuit if annuit_invest == 1		& year == 2004
		replace annuit_s = annuit if annuit_invest == 2		& year == 2004
		
		replace trusts_r = 0.5*trusts if trusts_invest == 3 & year == 2004
		replace trusts_s = 0.5*trusts if trusts_invest == 3 & year == 2004
		
		replace trusts_r = trusts if trusts_invest == 1		& year == 2004
		replace trusts_s = trusts if trusts_invest == 2		& year == 2004
		
		replace ann_trust_r = annuit_r + trusts_r			if year == 2004
		replace ann_trust_s = annuit_s + trusts_s			if year == 2004

		// for categorization: 1998 - 2001: no information on exact splitting rule, use 50/50 as proxy 
		// _invest = 5  split, = 1 all in stocks, = 2 all in bonds
		replace annuit_r = 0.5*annuit if annuit_invest == 5 & year < 2004 & year > 1995
		replace annuit_s = 0.5*annuit if annuit_invest == 5 & year < 2004 & year > 1995
		
		replace annuit_r = annuit if annuit_invest == 1		& year < 2004 & year > 1995
		replace annuit_s = annuit if annuit_invest == 2		& year < 2004 & year > 1995
		
		replace trusts_r = 0.5*trusts if trusts_invest == 5 & year < 2004 & year > 1995
		replace trusts_s = 0.5*trusts if trusts_invest == 5 & year < 2004 & year > 1995
		
		replace trusts_r = trusts if trusts_invest == 1		& year < 2004 & year > 1995
		replace trusts_s = trusts if trusts_invest == 2		& year < 2004 & year > 1995
		
		replace ann_trust_r = annuit_r + trusts_r			if year < 2004 & year > 1995
		replace ann_trust_s = annuit_s + trusts_s 			if year < 2004 & year > 1995
		replace ann_trust  = annuit + trusts				if year < 2004 & year > 1995

		// for categorization: no information on how invested before 1998, extrapolate from later years
		egen riskyshare = mean(ann_trust_r/ann_trust) if year >= 1998
		egen safeshare  = mean(ann_trust_s/ann_trust) if year >= 1998
		
		egen riskyshare2 = max(riskyshare)
		egen safeshare2  = max(safeshare)
		
		replace ann_trust_r = riskyshare2*ann_trust if year < 1998
		replace ann_trust_s = safeshare2*ann_trust  if year < 1998
		drop riskyshare* safeshare*
		
		
		// IRA/KEOGH accounts (roth IRAs, roll-over IRAs, regular IRAs, Keogh)
		// _r refers to "risky", _s refers to "safe"
		gen IRAKH_r = 0 
		gen IRAKH_s = 0 
		
		// for categorization: after 2001, use information on fraction invested in stocks
		// _invest = 1 all in stocks, _invest = 2 all in bonds, _invest = 3 split
		foreach x in ref sp oth {
		gen IRAKH_`x'_r = 0
		gen IRAKH_`x'_s = 0
		
		replace IRAKH_`x'_r = IRAKH_`x'_ps*IRAKH_`x' if IRAKH_`x'_invest == 3 & year > 2001
		replace IRAKH_`x'_s = (1-IRAKH_`x'_ps)*IRAKH_`x'  if IRAKH_`x'_invest == 3 & year > 2001
		
		replace IRAKH_`x'_r = IRAKH_`x' if IRAKH_`x'_invest == 1 & year > 2001
		replace IRAKH_`x'_s = IRAKH_`x' if IRAKH_`x'_invest == 2 & year > 2001
		}
		
		replace IRAKH_r =  IRAKH_ref_r + IRAKH_sp_r + IRAKH_oth_r if year > 2001
		replace IRAKH_s =  IRAKH_ref_s + IRAKH_sp_s + IRAKH_oth_s if year > 2001
		
		// for categorization: up until 2001, cannot identify individual accounts (i.e. ref/spouse/other) and not what % in stocks
		// _invest = 1 bank accounts, _invest = 2 stock, _invest = 3 bonds , _invest = 4,5,6 split
		
		replace IRAKH_r  = 0.5*IRAKH if  IRAKH_invest == 4 & year <= 2001 | IRAKH_invest == 5 & year <= 2001 | ///
									     IRAKH_invest == 6 & year <= 2001
		replace IRAKH_r  = 0.5*IRAKH if  IRAKH_invest == 4 & year <= 2001 | IRAKH_invest == 5 & year <= 2001 | ///
									     IRAKH_invest == 6 & year <= 2001
		
		replace IRAKH_r  = IRAKH if IRAKH_invest == 2	& year <= 2001
		replace IRAKH_s  = IRAKH if IRAKH_invest == 1	& year <= 2001 | IRAKH_invest == 3 & year <= 2001 
	
		// Future pensions
		// before 2004, no variable 'additional account' for future pensions: therefore, not included in analysis.
		// before 2004, cannot identify what % is in stocks, only how the money is invested
		
		/* Question on value: How much money is currently in this account (from which you have rights to 
		*receive pension payments (both lump-sum or regular payments) in the future? */

		// _r refers to "risky", _s refers to "safe"
		gen futpen_r = 0 
		gen futpen_s = 0 
			
		// after 2007: aggregate information on individual accounts
		// _invest = 1 stocks, _invest = 2 bonds, _invest = 3 split
		foreach num of numlist 1/4 {
		replace futpen_r = futpen_r + futpen`num'_ps*futpen`num'      if futpen_invest`num' == 3 & year > 2007
		replace futpen_s = futpen_s + (1-futpen`num'_ps)*futpen`num'  if futpen_invest`num' == 3 & year > 2007
		
		replace futpen_r = futpen_r + futpen`num' if futpen_invest`num' == 1 & year > 2007
		replace futpen_s = futpen_s + futpen`num' if futpen_invest`num' == 2 & year > 2007
		}
		
		// from 2001 until 2007: aggregate information on individual accounts 
		// _invest = 1 stocks, _invest = 2 bonds, _invest = 3 split
		foreach num of numlist 1/6 {
		replace futpen_r = futpen_r + futpen`num'_ps*futpen`num'      if futpen_invest`num' == 3 & year <= 2007 & year > 2001
		replace futpen_s = futpen_s + (1-futpen`num'_ps)*futpen`num'  if futpen_invest`num' == 3 & year <= 2007 & year > 2001
		
		replace futpen_r = futpen_r + futpen`num' if futpen_invest`num' == 1 & year <= 2007 & year > 2001
		replace futpen_s = futpen_s + futpen`num' if futpen_invest`num' == 2 & year <= 2007 & year > 2001
		}
		
		// 2001: aggregate information on individual accounts (no info on what % in stocks): assume 50/50 splitting rule
		foreach num of numlist 1/6 {
		replace futpen_r = futpen_r + 0.5*futpen`num'  if futpen_invest`num' == 3 & year ==  2001
		replace futpen_s = futpen_s + 0.5*futpen`num'  if futpen_invest`num' == 3 & year == 2001
		
		replace futpen_r = futpen_r + futpen`num' if futpen_invest`num' == 1 & year == 2001
		replace futpen_s = futpen_s + futpen`num' if futpen_invest`num' == 2 & year == 2001
		}
	
		// before 1998: do not have any information on how invested, assume same share as in the years before
		egen riskyshare = mean(futpen_r/futpen) if year > 1998
		egen safeshare  = mean(futpen_s/futpen) if year > 1998
		
		egen riskyshare2 = max(riskyshare)
		egen safeshare2  = max(safeshare)
		
		replace futpen_r = riskyshare2*futpen if year <= 1998
		replace futpen_s = safeshare2*futpen  if year <= 1998
		drop riskyshare* safeshare*
		
		
		// Current job pensions (including thrifts)
		// before 2004, no information on what percent exactly in stocks
		// before 2010: 6 (3 per person) individual accounts listed, only 4 in later years
		// from 2001 onwards two variables on thrift: X4202 and X4226 (I use the latter, more comparable with later years)
		
		replace thrift5_ps = 0 if thrift5_ps == .
		replace thrift6_ps = 0 if thrift6_ps == .
		
		// _r refers to "risky", _s refers to "safe"
		gen thrift_r = 0 
		gen thrift_s = 0 
		gen riskyshare = 0
		gen safeshare = 0
			
		// after 2007: aggregate information on individual accounts 
		// _invest = 1 stocks, _invest = 2 bonds, _invest = 3 split
		foreach num of numlist 1/4 {
		replace thrift_r = thrift_r + thrift`num'_ps*thrift`num'      if thrift_invest`num' == 3 & year > 2007
		replace thrift_s = thrift_s + (1-thrift`num'_ps)*thrift`num'  if thrift_invest`num' == 3 & year > 2007
		
		replace thrift_r = thrift_r + thrift`num' if thrift_invest`num' == 1 & year > 2007
		replace thrift_s = thrift_s + thrift`num' if thrift_invest`num' == 2 & year > 2007
		}
		
		// add. account (no information on how invested - assume same share as for other accounts)
		replace riskyshare = thrift_r/thrift if year > 2007 & thrift > 0 
		replace safeshare  = thrift_s/thrift if year > 2007 & thrift > 0
		
		replace thrift_r = thrift_r + riskyshare*(thrift7+thrift8) if year > 2007
		replace thrift_s = thrift_s + safeshare*(thrift7+thrift8) if year > 2007
		
		// 2004 and 2007: aggregate information on individual accounts
		// _invest = 1 stocks, _invest = 2 bonds, _invest = 3 split
		foreach num of numlist 1/6 {
		replace thrift_r = thrift_r + thrift`num'_ps*thrift`num'      if thrift_invest`num' == 3 & year >= 2004 & year <= 2007
		replace thrift_s = thrift_s + (1-thrift`num'_ps)*thrift`num'  if thrift_invest`num' == 3 & year >= 2004 & year <= 2007
		
		replace thrift_r = thrift_r + thrift`num' if thrift_invest`num' == 1 & year >= 2004 & year <= 2007
		replace thrift_s = thrift_s + thrift`num' if thrift_invest`num' == 2 & year >= 2004 & year <= 2007
		}
		
		// add. account (no information on how invested - assume same share as for other accounts)
		replace riskyshare = thrift_r/thrift if year >= 2004 & year <= 2007 & thrift > 0
		replace safeshare  = thrift_s/thrift if year >= 2004 & year <= 2007 & thrift > 0
		
		replace thrift_r = thrift_r + riskyshare*(thrift7+thrift8) if year >= 2004 & year <= 2007
		replace thrift_s = thrift_s + safeshare*(thrift7+thrift8) if year >= 2004 & year <= 2007
		
		// before 2001: aggregate information on individual accounts (no info on what % in stocks, assume 50/50)
		foreach num of numlist 1/6 {
		replace thrift_r = thrift_r + 0.5*thrift`num'  if thrift_invest`num' == 3 & year <=  2001
		replace thrift_s = thrift_s + 0.5*thrift`num'  if thrift_invest`num' == 3 & year <= 2001
		
		replace thrift_r = thrift_r + thrift`num' if thrift_invest`num' == 1 & year <= 2001
		replace thrift_s = thrift_s + thrift`num' if thrift_invest`num' == 2 & year <= 2001
		}
		
		// add. account (no information on how invested - assume same share as for other accounts)
		replace riskyshare = thrift_r/thrift if year <= 2001 & thrift > 0
		replace safeshare  = thrift_s/thrift if year <= 2001 & thrift > 0
		
		replace thrift_r = thrift_r + riskyshare*(thrift7+thrift8) if year <= 2001
		replace thrift_s = thrift_s + safeshare*(thrift7+thrift8) if year <= 2001
		
		drop riskyshare safeshare
		
*******************************************************************
******* further information on variables and dataset  *************
*******************************************************************
		
		// Current Pension: 
		*no info on how much in account before 2001: do not include in 
		*analysis, to ensure consistency and because focus is on working age (note: future pensions are included)
		
		// Bonds
		*Saving bonds always as face value (fv), and in 1989 all other bonds also only report fv. therfore, all values across 
		* all years reported as fv

		// Savings and Money Market
		*categorize as safe assets, even though SCF collects information on how they are investet in years after 2004
		*when looking at categorization in years when available: fraction invested in stocks is zero for over 95% of all
		
		// Financial assets
		* ca. 5% of the sample report to have zero financial wealth, treated as zeros with risky share of zero.
		
		// Business Wealth
		* Business Wealth is excluded from the analysis

************************************************************************
*******create variables describing the household portfolio *************
************************************************************************

		// Gross Housing Wealth: refers to PRIMARY RESIDENCE 
		g housing = hval_mh1 + hval_mh2 + hval_mh3 + hval_house + ((10000-max(0,farm_frac))/10000)*(hval_farm1 + hval_farm2) 
		
		// all other residential properties 
		gen oresre  = land_contracts + vac_prop + oth_prop1 + oth_prop2 + oth_prop3

		g safe      = checking + savings + mm + call + cds + cashli + gbmutf + tfbmutf + 0.5*comutf + ///
						savbnd + mortbndf + notxbndf + govtbndf + ann_trust_s + IRAKH_s + futpen_s + thrift_s

		g risky     = stocks + stmutf + obmutf + 0.5*comutf + omutf + obndf + ann_trust_r + IRAKH_r + ///
					  futpen_r + thrift_r
			  
		// assets and debts
		g finassets = safe + risky
		
		g assets = finassets + vehic + housing + oresre 
		
		g debt = resdbt + othloc + ccbal + inst_loan + odebt + mrthel
		
		gen nw = assets - debt 
		
		// Stock Market Participation Rate
		g SMP 	    = 1 if risky > 0
		replace SMP = 0 if risky == 0 
		
		// risky share (over financial assets and net worth)
		g share_fin     = risky/finassets
		gen share       = risky/nw
		
		// part of net worth that is not invested in risky assets 
		g safe_nw = nw- risky
		
		// Households with zero financial assets/nw have a risky share of 0
		*** note: results are not senstive to dropping these observations *** 
		replace share_fin = 0 if finassets == 0 & nw != .
		replace share     = 0 if nw == 0
		
		// 'tighter' definition of financial assets: excluding pension wealth
		g safe2      = checking + savings + mm + call + cds + cashli + gbmutf + tfbmutf + 0.5*comutf + ///
						savbnd + mortbndf + notxbndf + govtbndf 

		g risky2     = stocks + stmutf + obmutf + 0.5*comutf + omutf + obndf 
		
		g finassets2 = safe2 + risky2
		
		g assets2    = finassets2 + vehic + housing + oresre 
		
		gen nw2 = assets2 - debt 

		g SMP2 	     = 1 if risky2 > 0
		replace SMP2 = 0 if risky2 == 0 
		
		g share2_fin     = risky2/finassets2
		gen share2       = risky2/nw2
		
		//  Households with zero financial assets have a risky share of 0
		replace share2_fin = 0 if finassets2 == 0
		replace share2 = 0 if nw2 == 0

*************************************************************
****************** winsorize variables  *********************
*************************************************************
		
		foreach var of varlist safe safe2 safe_nw share risky risky2 share2 share_fin share2_fin nw nw2 finassets finassets2 {
		winsor2 `var', cuts(1 99)
		replace `var' = `var'_w
		drop `var'_w
		} 
		
		
*************************************************************
************ deflate the financial variables (CPI) **********
*************************************************************

		// in 2007 dollars 
		
		ren year wave

		merge m:1 wave using "C:\Users\A2210267\Dropbox\1_Family_Finance\Data\SCF\data_adj\cpi_estimates.dta"
		drop _merge

		global list safe* risky* finassets* totinc* finc* hval* housing vehic oresre assets  ///
		land_contracts vac_prop oth_prop1 oth_prop2 oth_prop3 ///
		inher_expamt inher1_val inher2_val inher3_val inheroth_val inst_loan veh_loan educ_loan oth_install ///
		othloc odebt ccbal resdbt debt nh_mort heloc mrthel nw nw2
		
		foreach var of varlist $list {
		replace `var' = `var' / CPI
		}				
		
*************************************************************
********* final cleaning of dataset *************************
*************************************************************
		
		// code education of spouse as missing for singles
		replace educsp = . if single != .  
	
		// average birthyear within couples
		gen byav = byref if single !=.
		replace byav = (byref+bysp)/2 if couple == 1
		
		// average age within couples
		gen ageav = (ageref+agesp)/ 2 if couple == 1
		replace ageav = ageref if single != .		

		// restrict sample to (average) age 30 to 65
		keep if ageav >= 30 & ageav <=65	
		
******************************************************************
********* initial conditions on asset holdings (model input) *****
******************************************************************	

		preserve

		// for initial conditions: only work with one imputation (imputation 1)
		tostring Y1, replace
		g imp = substr(Y1,-1,.)
		keep if imp =="1"
		keep if ageav > 29 & ageav < 31   // only 30 years old
		
		// initial net worth allocation by marital status
		sum nw if single == 0 [aw=wgt_rv], det 
		sum nw if single == 1 [aw=wgt_rv], det
		sum nw if couple == 1 [aw=wgt_rv], det
		
		// initial asset allocation by marital status + education (more than 12th grade or not)
		sum nw  if single == 0 & educref < 8  [aw=wgt_rv]
		sum nw  if single == 0 & educref == 8 [aw=wgt_rv]
		
		sum nw  if single == 1 & educref < 8  [aw=wgt_rv]
		sum nw  if single == 1 & educref == 8 [aw=wgt_rv]
		
		sum nw  if couple == 1 & educref < 8 & educsp < 8  [aw=wgt_rv]
		sum nw  if couple == 1 & educref == 8 & educsp < 8 [aw=wgt_rv]
		sum nw  if couple == 1 & educref < 8 & educsp == 8 [aw=wgt_rv]
		sum nw  if couple == 1 & educref == 8 & educsp == 8 [aw=wgt_rv]
		
		restore

*************************************************
******** only keep necessary variables  *********
*************************************************

		keep peu_num YY1 Y1 sexref byref byav ageref agesp rev martref1 indus_ref occup_ref indus_sp occup_sp ///
				finc_wage finc_business finc_ntaxinv finc_interest finc_dividend finc_stocks ///
				finc_otherbusi finc_compen finc_childsup finc_socials finc_welfare finc_other ///
				educref educsp martref hh_num numkids numkids25 totinc_nocap totinc_nocapoth ///
				wave single couple safe risky finassets SMP share wgt_rv ///
				stocks risky2 safe2 finassets2 SMP2 share2 ageav hometype owntype*  hval* ///
				safe_nw housing  inher* share_fin share2_fin nw assets debt fam_type
				
*************************************************************
************* save dataset to carry out analysis ************
*************************************************************

		save dataset_analysis.dta, replace
